suppressPackageStartupMessages(library(PARE))
## Warning: package 'ggplot2' was built under R version 3.3.2
#setwd("~/Dropbox/Publications/ROC_Alternative")
setwd("~/Dropbox/PARE/Archive")
dir.create("Figures", showWarnings = FALSE)
saveFigure <- function(description1, description2)
{
ggsave(paste("Figures/", gsub(" ", "_", description1), "__", description2, ".png", sep=""), units="in")
}
generateNumericScores = function(means, randomSeed=999, n=numRandomValues)
{
if (length(means) < 1)
stop("Must provide at least one mean.")
set.seed(randomSeed)
scores = NULL
for (i in 1:length(means))
scores = c(scores, rnorm(n / length(means), mean=means[i]))
return(standardize(scores))
}
generateNumericScoresImbalanced = function(means, randomSeed=0, n=numRandomValues)
{
if (length(means) != 2)
stop("Must provide exactly two means.")
set.seed(randomSeed)
scores = rnorm(n * imbalanceProportion, mean=means[1])
scores = c(scores, rnorm(n * (1 - imbalanceProportion), mean=means[2]))
return(standardize(scores))
}
standardize = function(x)
{
(x - min(x)) / (max(x) - min(x))
}
numRandomValues = 1000
imbalanceProportion = 0.95
actual = factor(c(rep(0, numRandomValues / 2), rep(1, numRandomValues / 2)))
actualImbalanced = factor(c(rep(0, numRandomValues * imbalanceProportion), rep(1, numRandomValues * (1 - imbalanceProportion))))
randomScores = generateNumericScores(0)
roundedRandomScores = round(randomScores)
mediumSignalScores = generateNumericScores(c(0, 1))
roundedMediumSignalScores = round(mediumSignalScores)
signalScores = generateNumericScores(c(0, 10))
roundedSignalScores = round(signalScores)
oppositeScores = generateNumericScores(c(10, 0))
aFewGoodScores = standardize(c(rnorm(990), rnorm(10, mean=10)))
###FIVE LEVEL SCORES####
fiveLevelScores = c()
for(i in 1:1000)
{
if(i <= 200)
{
fiveLevelScores <- append(fiveLevelScores, 0)
}
if(i > 200 & i <= 400){
fiveLevelScores <- append(fiveLevelScores, .25)
}
if(i > 400 & i <= 600)
{
fiveLevelScores <- append(fiveLevelScores, .5)
}
if(i > 600 & i <= 800)
{
fiveLevelScores <- append(fiveLevelScores, .75)
}
if(i > 800 & i <= 1000)
{
fiveLevelScores <- append(fiveLevelScores, 1)
}
}
length(fiveLevelScores)
## [1] 1000
####
strongSignalDiscreteScores = as.integer(as.logical(as.numeric(as.vector(actual))))
oppositeBinaryScores = as.integer(!as.logical(as.numeric(as.vector(actual))))
singleValueScores = rep(as.integer(levels(actual)[2]), length(actual))
mediumSignalScoresImbalanced = generateNumericScoresImbalanced(c(0, 1))
signalScoresImbalanced = generateNumericScoresImbalanced(c(0, 10))
oppositeScoresImbalanced = generateNumericScoresImbalanced(c(10, 0))
strongSignalDiscreteScoresImbalanced = as.integer(as.logical(as.numeric(as.vector(actualImbalanced))))
oppositeBinaryScoresImbalanced = as.integer(!as.logical(as.numeric(as.vector(actualImbalanced))))
plotBarPlot <- function(values, main)
{
plotData <- data.frame(Scores=values)
print(ggplot(plotData, aes(x=Scores)) + geom_bar() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()) + xlab("") + ylab("") + ggtitle(main))
saveFigure(main, "Barplot")
}
plotHist <- function(values, main)
{
plotData <- data.frame(Scores=values)
print(ggplot(plotData, aes(x=Scores)) + geom_histogram() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()) + xlab("") + ylab("") + ggtitle(main))
saveFigure(main, "Histogram")
}
plotBoxPlot <- function(scores, actual, main)
{
plotData <- data.frame(Scores=scores, Actual=actual)
print(ggplot(plotData, aes(factor(Actual), Scores)) + geom_boxplot() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()) + xlab("") + ylab("") + ggtitle(main))
saveFigure(main, "BoxPlot")
}
# plotBarPlot(actual, main="Class Labels")
# plotBarPlot(actualImbalanced, main="Class Labels")
# plotHist(randomScores, main="Random Scores")
## plotHist(fiveLevelScores, main = "Five Level Scores")
# plotHist(mediumSignalScores, main="Medium Signal Scores")
# plotHist(signalScores, main="Signal Scores")
# plotHist(oppositeScores, main="Opposite Scores")
# plotHist(strongSignalDiscreteScores, main="Strong Signal Discrete Scores")
# plotHist(oppositeBinaryScores, main="Opposite Binary Scores")
# plotHist(aFewGoodScores, main="A Few Good Scores")
#
# plotBoxPlot(randomScores, actual, main="Random Scores")
# plotBoxPlot(mediumSignalScores, actual, main="Medium Signal Scores")
# plotBoxPlot(signalScores, actual, main="Signal Scores")
# plotBoxPlot(oppositeScores, actual, main="Opposite Scores")
# plotBoxPlot(strongSignalDiscreteScores, actual, main="Strong Signal Discrete Scores")
# plotBoxPlot(oppositeBinaryScores, actual, main="Opposite Binary Scores")
# plotBoxPlot(aFewGoodScores, actual, main="A Few Good Scores")
# plotHist(mediumSignalScoresImbalanced, main="Medium Signal Scores Imbalanced")
# plotHist(signalScoresImbalanced, main="Signal Scores Imbalanced")
# plotHist(oppositeScoresImbalanced, main="Opposite Scores Imbalanced")
# plotHist(strongSignalDiscreteScoresImbalanced, main="Strong Signal Discrete Scores Imbalanced")
# plotHist(oppositeBinaryScoresImbalanced, main="Opposite Binary Scores Imbalanced")
#
# plotBoxPlot(mediumSignalScoresImbalanced, actualImbalanced, main="Medium Signal Scores Imbalanced")
# plotBoxPlot(signalScoresImbalanced, actualImbalanced, main="Signal Scores Imbalanced")
# plotBoxPlot(oppositeScoresImbalanced, actualImbalanced, main="Opposite Scores Imbalanced")
# plotBoxPlot(strongSignalDiscreteScoresImbalanced, actualImbalanced, main="Strong Signal Discrete Scores Imbalanced")
# plotBoxPlot(oppositeBinaryScoresImbalanced, actualImbalanced, main="Opposite Binary Scores Imbalanced")
plotROC = function(actual, scores, main)
{
rocResult = roc(scores, actual)
aucValue = auc(rocResult)
main2 = paste(main, "\n", "(AUC = ", format(aucValue, digits=3, nsmall=3), ")", sep="")
plotData <- data.frame(TPR=rocResult$tpr, FPR=rocResult$fpr)
print(ggplot(plotData, aes(x=FPR, y=TPR)) + geom_abline(color="darkgrey", linetype="dashed") + geom_line() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()) + xlab("False positive rate") + ylab("True positive rate") + ggtitle(main2))
saveFigure(main, "ROC")
}
#
# plotROC(actual, randomScores, "Random Scores")
# plotROC(actual, mediumSignalScores, main="Medium Signal Scores")
# plotROC(actual, round(mediumSignalScores), main="Medium Signal Scores")
# plotROC(actual, signalScores, main="Signal Scores")
# plotROC(actual, oppositeScores, main="Opposite Scores")
# plotROC(actual, aFewGoodScores, main="A Few Good Scores")
#
# plotROC(actual, strongSignalDiscreteScores, main="Strong Signal Discrete Scores")
# plotROC(actual, singleValueScores, main="Single Value Scores")
# plotROC(actual, oppositeBinaryScores, main="Opposite Binary Scores")
#
# plotROC(actualImbalanced, randomScores, main="Random Scores Imbalanced")
# plotROC(actualImbalanced, mediumSignalScoresImbalanced, main="Medium Signal Scores Imbalanced")
# plotROC(actualImbalanced, signalScoresImbalanced, main="Signal Scores Imbalanced")
# plotROC(actualImbalanced, oppositeScoresImbalanced, main="Opposite Scores Imbalanced")
# plotROC(actualImbalanced, strongSignalDiscreteScoresImbalanced, main="Strong Signal Discrete Scores Imbalanced")
# plotROC(actualImbalanced, oppositeBinaryScoresImbalanced, main="Opposite Binary Scores Imbalanced")
##TESTING###
accuracyThresholdData <- calculateScoresAtThresholds(actual, signalScores, calculateAccuracy)
pare(actual, roundedRandomScores, main="Rounded Random Scores")
## [1] 0.01414141
pare(actual, fiveLevelScores, main="5 level scores")
## [1] 1
pare(actual, roundedMediumSignalScores, main="Rounded Medium Signal Scores")
## [1] 0.4485294
pare(actual, roundedSignalScores, main="Rounded Signal Scores")
## [1] 1
pare(actual, randomScores, main="Random Scores")
## [1] 0.01347425
pare(actual, mediumSignalScores, main="Medium Signal Scores")
## [1] 0.7819924
pare(actual, signalScores, main="Strong Signal Probabilistic Scores")
## [1] 1
pare(actual, oppositeScores, main="Opposite Scores")
## [1] -1
pare(actual, aFewGoodScores, main="A Few Good Scores")
## [1] 0.5971007
pare(actual, strongSignalDiscreteScores, main="Strong Signal Discrete Scores")
## [1] 1
pare(actual, oppositeBinaryScores, main="Opposite Binary Scores")
## [1] -1
pare(actual, singleValueScores, main="Single Value Scores")
## [1] 0
pare(actualImbalanced, randomScores, main="Random Scores Imbalanced")
## [1] 0.001960238
pare(actualImbalanced, mediumSignalScoresImbalanced, main="Medium Signal Scores Imbalanced")
## [1] 0.5230887
pare(actualImbalanced, mediumSignalScores, main="Medium Signal Scores Imbalanced")
## [1] 0.421649
pare(actualImbalanced, signalScoresImbalanced, main="Signal Scores Imbalanced")
## [1] 1
pare(actualImbalanced, signalScores, main="Signal Scores Imbalanced")
## [1] 0.7922449
pare(actualImbalanced, oppositeScoresImbalanced, main="Opposite Scores Imbalanced")
## [1] -0.954
pare(actualImbalanced, strongSignalDiscreteScoresImbalanced, main="Strong Signal Discrete Scores Imbalanced")
## [1] 1
pare(actualImbalanced, oppositeBinaryScoresImbalanced, main="Opposite Binary Scores Imbalanced")
## [1] -1
#FOR LOOP
#after loop make histogram
roundedscores=NULL
scores=NULL
plotHist <- function(values, main)
{
plotData <- data.frame(Scores=values)
print(ggplot(plotData, aes(x=Scores)) + geom_histogram() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()) + xlab("") + ylab("") + ggtitle(main))
saveFigure(main, "Histogram")
}
for(i in 1:100)
{
print(i)
mediumSignalScores = generateNumericScores(c(0, 1),randomSeed = i)
print("medium")
print(mediumSignalScores[1:5])
roundedMediumSignalScores = round(mediumSignalScores)
print("rounded")
print(roundedMediumSignalScores[1:5])
score = pare(actual, mediumSignalScores, plot = FALSE)
scores = c(scores, score)
roundedscore = pare(actual, roundedMediumSignalScores, plot = FALSE)
roundedscores = c(roundedscores, roundedscore)
}
## [1] 1
## [1] "medium"
## [1] 0.3371553 0.4518384 0.3075431 0.6516797 0.4724880
## [1] "rounded"
## [1] 0 0 0 1 0
## [1] 2
## [1] "medium"
## [1] 0.2711337 0.4318562 0.6403055 0.2364474 0.3924689
## [1] "rounded"
## [1] 0 0 1 0 0
## [1] 3
## [1] "medium"
## [1] 0.2721156 0.3790296 0.4670823 0.2417381 0.4570194
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 4
## [1] "medium"
## [1] 0.4680242 0.3517588 0.5712950 0.5260959 0.6852980
## [1] "rounded"
## [1] 0 0 1 1 1
## [1] 5
## [1] "medium"
## [1] 0.3363578 0.6180330 0.2838717 0.4516751 0.6594361
## [1] "rounded"
## [1] 0 1 0 0 1
## [1] 6
## [1] "medium"
## [1] 0.5135343 0.4032511 0.5869739 0.6922240 0.4834479
## [1] "rounded"
## [1] 1 0 1 1 0
## [1] 7
## [1] "medium"
## [1] 0.7579785 0.2559711 0.3283726 0.3690055 0.2885493
## [1] "rounded"
## [1] 1 0 0 0 0
## [1] 8
## [1] "medium"
## [1] 0.4190082 0.5512896 0.3648225 0.3523304 0.5363652
## [1] "rounded"
## [1] 0 1 0 0 1
## [1] 9
## [1] "medium"
## [1] 0.3375731 0.3302017 0.4303811 0.4101842 0.5161507
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 10
## [1] "medium"
## [1] 0.4004175 0.3735037 0.2161200 0.3184940 0.4369831
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 11
## [1] "medium"
## [1] 0.2947397 0.3770700 0.1713662 0.1918813 0.5306193
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 12
## [1] "medium"
## [1] 0.2122855 0.6433356 0.2861290 0.2913082 0.1393934
## [1] "rounded"
## [1] 0 1 0 0 0
## [1] 13
## [1] "medium"
## [1] 0.4743019 0.3578437 0.6446548 0.4230905 0.5563779
## [1] "rounded"
## [1] 0 0 1 0 1
## [1] 14
## [1] "medium"
## [1] 0.3240565 0.6751224 0.7345052 0.6424164 0.4163216
## [1] "rounded"
## [1] 0 1 1 1 0
## [1] 15
## [1] "medium"
## [1] 0.5168696 0.6986966 0.4476634 0.5906940 0.5433745
## [1] "rounded"
## [1] 1 1 0 1 1
## [1] 16
## [1] "medium"
## [1] 0.4469078 0.3636447 0.5326627 0.1811708 0.5398038
## [1] "rounded"
## [1] 0 0 1 0 1
## [1] 17
## [1] "medium"
## [1] 0.2991011 0.4091982 0.3911483 0.3223760 0.5094500
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 18
## [1] "medium"
## [1] 0.6320261 0.7535540 0.2880585 0.4677628 0.4600397
## [1] "rounded"
## [1] 1 1 0 0 0
## [1] 19
## [1] "medium"
## [1] 0.2685481 0.4993182 0.3921376 0.3623688 0.5859035
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 20
## [1] "medium"
## [1] 0.6124072 0.3481538 0.7065229 0.2353157 0.3692138
## [1] "rounded"
## [1] 1 0 1 0 0
## [1] 21
## [1] "medium"
## [1] 0.5332014 0.4945666 0.6692141 0.2386411 0.7335908
## [1] "rounded"
## [1] 1 0 1 0 1
## [1] 22
## [1] "medium"
## [1] 0.4017800 0.7834907 0.5953484 0.5042913 0.4403901
## [1] "rounded"
## [1] 0 1 1 1 0
## [1] 23
## [1] "medium"
## [1] 0.4650436 0.3745227 0.5688510 0.6957344 0.5808655
## [1] "rounded"
## [1] 0 0 1 1 1
## [1] 24
## [1] "medium"
## [1] 0.4694173 0.5989201 0.5849271 0.4649014 0.6361122
## [1] "rounded"
## [1] 0 1 1 0 1
## [1] 25
## [1] "medium"
## [1] 0.3898544 0.2621086 0.2449092 0.4719690 0.1915139
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 26
## [1] "medium"
## [1] 0.1252610 0.5793137 0.3524911 0.5347703 0.3635132
## [1] "rounded"
## [1] 0 1 0 1 0
## [1] 27
## [1] "medium"
## [1] 0.7177771 0.6062380 0.3268499 0.2254632 0.2787191
## [1] "rounded"
## [1] 1 1 0 0 0
## [1] 28
## [1] "medium"
## [1] 0.1832255 0.4113086 0.2540868 0.1934225 0.4394754
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 29
## [1] "medium"
## [1] 0.2593267 0.2620522 0.4642777 0.5644621 0.2741499
## [1] "rounded"
## [1] 0 0 0 1 0
## [1] 30
## [1] "medium"
## [1] 0.3071356 0.4299158 0.4072163 0.6414809 0.7133937
## [1] "rounded"
## [1] 0 0 0 1 1
## [1] 31
## [1] "medium"
## [1] 0.4290654 0.3933074 0.6587236 0.5646461 0.6453790
## [1] "rounded"
## [1] 0 0 1 1 1
## [1] 32
## [1] "medium"
## [1] 0.4139477 0.5411317 0.2595184 0.5133406 0.4783503
## [1] "rounded"
## [1] 0 1 0 1 0
## [1] 33
## [1] "medium"
## [1] 0.4000888 0.4132339 0.5585123 0.3970020 0.1208575
## [1] "rounded"
## [1] 0 0 1 0 0
## [1] 34
## [1] "medium"
## [1] 0.4625625 0.6494545 0.3775654 0.4016440 0.4451547
## [1] "rounded"
## [1] 0 1 0 0 0
## [1] 35
## [1] "medium"
## [1] 0.5577762 0.4139569 0.3882050 0.3865184 0.9083925
## [1] "rounded"
## [1] 1 0 0 0 1
## [1] 36
## [1] "medium"
## [1] 0.5429214 0.6159825 0.5963905 0.7314060 0.3178987
## [1] "rounded"
## [1] 1 1 1 1 0
## [1] 37
## [1] "medium"
## [1] 0.4354863 0.4748527 0.5050166 0.3714615 0.2896752
## [1] "rounded"
## [1] 0 0 1 0 0
## [1] 38
## [1] "medium"
## [1] 0.3816349 0.2477719 0.5385440 0.4281605 0.1449161
## [1] "rounded"
## [1] 0 0 1 0 0
## [1] 39
## [1] "medium"
## [1] 0.3733282 0.2089489 0.3352703 0.3086876 0.4761584
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 40
## [1] "medium"
## [1] 0.4862684 0.4890581 0.2839897 0.2886067 0.3653673
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 41
## [1] "medium"
## [1] 0.2655503 0.4219849 0.5488910 0.5941860 0.5337542
## [1] "rounded"
## [1] 0 0 1 1 1
## [1] 42
## [1] "medium"
## [1] 0.5827749 0.3242874 0.4481893 0.4842096 0.4536832
## [1] "rounded"
## [1] 1 0 0 0 0
## [1] 43
## [1] "medium"
## [1] 0.4254269 0.2084040 0.3621094 0.4964035 0.3030732
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 44
## [1] "medium"
## [1] 0.5500973 0.4600524 0.1950299 0.4385200 0.2873185
## [1] "rounded"
## [1] 1 0 0 0 0
## [1] 45
## [1] "medium"
## [1] 0.4793294 0.3225151 0.3711454 0.3161011 0.2932639
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 46
## [1] "medium"
## [1] 0.3145676 0.4744859 0.3390835 0.6217871 0.6121895
## [1] "rounded"
## [1] 0 0 0 1 1
## [1] 47
## [1] "medium"
## [1] 0.7306521 0.5431876 0.4664031 0.3981724 0.4552112
## [1] "rounded"
## [1] 1 1 0 0 0
## [1] 48
## [1] "medium"
## [1] 0.46317233 0.01125765 0.32736398 0.74762093 0.55271837
## [1] "rounded"
## [1] 0 0 0 1 1
## [1] 49
## [1] "medium"
## [1] 0.3287107 0.2947375 0.1597188 0.2948132 0.3814869
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 50
## [1] "medium"
## [1] 0.5043491 0.3004881 0.4286420 0.5006096 0.1706640
## [1] "rounded"
## [1] 1 0 0 1 0
## [1] 51
## [1] "medium"
## [1] 0.5082416 0.3329953 0.2982997 0.4973810 0.6629855
## [1] "rounded"
## [1] 1 0 0 0 1
## [1] 52
## [1] "medium"
## [1] 0.2847150 0.3785440 0.6488226 0.5926913 0.1027350
## [1] "rounded"
## [1] 0 0 1 1 0
## [1] 53
## [1] "medium"
## [1] 0.3971411 0.1783345 0.4602690 0.1541075 0.2548566
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 54
## [1] "medium"
## [1] 0.7006199 0.5015610 0.3784414 0.6629105 0.5978144
## [1] "rounded"
## [1] 1 1 0 1 1
## [1] 55
## [1] "medium"
## [1] 0.4228524 0.1668414 0.4270180 0.2586676 0.4071897
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 56
## [1] "medium"
## [1] 0.4166568 0.3794216 0.3945142 0.3371048 0.5287283
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 57
## [1] "medium"
## [1] 0.3993372 0.2461315 0.5872714 0.7863189 0.5185109
## [1] "rounded"
## [1] 0 0 1 1 1
## [1] 58
## [1] "medium"
## [1] 0.3624350 0.4977899 0.5827488 0.3230950 0.3364038
## [1] "rounded"
## [1] 0 0 1 0 0
## [1] 59
## [1] "medium"
## [1] 0.1910268 0.6085948 0.4942300 0.6447679 0.7613007
## [1] "rounded"
## [1] 0 1 0 1 1
## [1] 60
## [1] "medium"
## [1] 0.4573825 0.4275040 0.2819966 0.3390485 0.1252399
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 61
## [1] "medium"
## [1] 0.4036496 0.4044243 0.2214658 0.5030164 0.2672705
## [1] "rounded"
## [1] 0 0 0 1 0
## [1] 62
## [1] "medium"
## [1] 0.5667052 0.4908048 0.2268559 0.4599903 0.7810471
## [1] "rounded"
## [1] 1 0 0 0 1
## [1] 63
## [1] "medium"
## [1] 0.6326976 0.1967980 0.5193080 0.1931500 0.5267513
## [1] "rounded"
## [1] 1 0 1 0 1
## [1] 64
## [1] "medium"
## [1] 0.1737234 0.1729643 0.2301401 0.6741412 0.4391473
## [1] "rounded"
## [1] 0 0 0 1 0
## [1] 65
## [1] "medium"
## [1] 0.2178999 0.2500654 0.4114886 0.1853450 0.4827355
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 66
## [1] "medium"
## [1] 0.8113104 0.5181971 0.5461890 0.4614427 0.4442140
## [1] "rounded"
## [1] 1 1 1 0 0
## [1] 67
## [1] "medium"
## [1] 0.6255083 0.4277713 0.3051831 0.4344269 0.2778266
## [1] "rounded"
## [1] 1 0 0 0 0
## [1] 68
## [1] "medium"
## [1] 0.6108939 0.3596067 0.3400315 0.4824379 0.3321055
## [1] "rounded"
## [1] 1 0 0 0 0
## [1] 69
## [1] "medium"
## [1] 0.4122090 0.4597886 0.3462431 0.2477576 0.2493074
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 70
## [1] "medium"
## [1] 0.2355115 0.5106664 0.6202929 0.3874894 0.3234229
## [1] "rounded"
## [1] 0 1 1 0 0
## [1] 71
## [1] "medium"
## [1] 0.3058441 0.3037859 0.2995763 0.4197169 0.3077141
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 72
## [1] "medium"
## [1] 0.5847244 0.5528491 0.3824045 0.5339079 0.4735302
## [1] "rounded"
## [1] 1 1 0 1 0
## [1] 73
## [1] "medium"
## [1] 0.3876194 0.4459064 0.4195199 0.3899806 0.2938878
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 74
## [1] "medium"
## [1] 0.5108251 0.3115602 0.5948780 0.1514329 0.5100390
## [1] "rounded"
## [1] 1 0 1 0 1
## [1] 75
## [1] "medium"
## [1] 0.3170214 0.5176909 0.2159344 0.4619589 0.6938858
## [1] "rounded"
## [1] 0 1 0 0 1
## [1] 76
## [1] "medium"
## [1] 0.4997974 0.4332172 0.4456444 0.3859088 0.3938313
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 77
## [1] "medium"
## [1] 0.4005040 0.6302676 0.5670705 0.6234787 0.5012410
## [1] "rounded"
## [1] 0 1 1 1 1
## [1] 78
## [1] "medium"
## [1] 0.5536531 0.4880176 0.6047998 0.3742645 0.2614919
## [1] "rounded"
## [1] 1 0 1 0 0
## [1] 79
## [1] "medium"
## [1] 0.5332205 0.4584436 0.4203129 0.2774299 0.5302765
## [1] "rounded"
## [1] 1 0 0 0 1
## [1] 80
## [1] "medium"
## [1] 0.4065071 0.5321222 0.4620362 0.4865798 0.4578487
## [1] "rounded"
## [1] 0 1 0 0 0
## [1] 81
## [1] "medium"
## [1] 0.2941805 0.5070636 0.5594574 0.5181240 0.4809344
## [1] "rounded"
## [1] 0 1 1 1 0
## [1] 82
## [1] "medium"
## [1] 0.2130680 0.4679829 0.3618905 0.1823262 0.4542396
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 83
## [1] "medium"
## [1] 0.1030755 0.4220209 0.4063151 0.4391633 0.5617963
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 84
## [1] "medium"
## [1] 0.4950928 0.5515768 0.2984767 0.2447140 0.4328430
## [1] "rounded"
## [1] 0 1 0 0 0
## [1] 85
## [1] "medium"
## [1] 0.3699969 0.2997930 0.3958334 0.2527577 0.2375268
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 86
## [1] "medium"
## [1] 0.5080792 0.5968600 0.3872603 0.3356937 0.4968043
## [1] "rounded"
## [1] 1 1 0 0 0
## [1] 87
## [1] "medium"
## [1] 0.1203732 0.1785968 0.1579341 0.2906094 0.6720344
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 88
## [1] "medium"
## [1] 0.4139370 0.5326848 0.7638999 0.1936229 0.5072600
## [1] "rounded"
## [1] 0 1 1 0 1
## [1] 89
## [1] "medium"
## [1] 0.1863272 0.4686673 0.5843639 0.2665969 0.6283382
## [1] "rounded"
## [1] 0 0 1 0 1
## [1] 90
## [1] "medium"
## [1] 0.4596183 0.4252787 0.3149947 0.3395911 0.5594529
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 91
## [1] "medium"
## [1] 0.4363929 0.4330801 0.6951001 0.7852607 0.2995693
## [1] "rounded"
## [1] 0 0 1 1 0
## [1] 92
## [1] "medium"
## [1] 0.2423447 0.3765686 0.5718734 0.2820884 0.2058889
## [1] "rounded"
## [1] 0 0 1 0 0
## [1] 93
## [1] "medium"
## [1] 0.3712926 0.3264916 0.5061689 0.5334663 0.5215936
## [1] "rounded"
## [1] 0 0 1 1 1
## [1] 94
## [1] "medium"
## [1] 0.6135463 0.4977264 0.2313514 0.6283846 0.2718537
## [1] "rounded"
## [1] 1 0 0 1 0
## [1] 95
## [1] "medium"
## [1] 0.2912852 0.2006192 0.4460903 0.4007502 0.7411311
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 96
## [1] "medium"
## [1] 0.4287672 0.6225008 0.6861788 0.6153534 0.3982655
## [1] "rounded"
## [1] 0 1 1 1 0
## [1] 97
## [1] "medium"
## [1] 0.2780237 0.7729947 0.6314076 0.7413485 0.4341678
## [1] "rounded"
## [1] 0 1 1 1 0
## [1] 98
## [1] "medium"
## [1] 0.4147221 0.3674890 0.3345075 0.1715653 0.5037293
## [1] "rounded"
## [1] 0 0 0 0 1
## [1] 99
## [1] "medium"
## [1] 0.4274659 0.4622005 0.4109764 0.4575204 0.3520605
## [1] "rounded"
## [1] 0 0 0 0 0
## [1] 100
## [1] "medium"
## [1] 0.3802209 0.4657087 0.4373197 0.5675906 0.4637446
## [1] "rounded"
## [1] 0 0 0 1 0
plotHist(scores, main="Medium Signal Scores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plotHist(roundedscores, main = "Medium Signal Rounded Scores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plotData <- data.frame(Numeric=scores, Rounded=roundedscores)
print(ggplot(plotData, aes(x=Numeric, y=Rounded)) + geom_point() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()))
saveFigure("Numeric", "Rounded")
## Saving 7 x 5 in image
compareAUCvPARE = function(meanOptions, iterations=20)
{
aucScores = NULL
pareScores = NULL
for (i in 1:iterations)
{
message(paste("Iteration ", i, "/", iterations, sep=""))
for (j in 1:length(meanOptions))
{
meanOption = meanOptions[j]
predictors = generateNumericScores(c(0, meanOption), i*j)
aucScores = c(aucScores, auc(roc(predictors, actual)))
pareScore <-pare(actual, predictors, plot=FALSE)
pareScores = c(pareScores, pareScore)
}
}
hist(aucScores, main=paste("AUC:", format(mean(aucScores), digits=3, nsmall=3)))
hist(pareScores, main=paste("PARE:", format(mean(pareScores), digits=3, nsmall=3)))
plotData <- data.frame(AUC=aucScores, PARE=pareScores)
print(ggplot(plotData, aes(x=AUC, y=PARE)) + geom_point() + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank()))
saveFigure("AUC", "PARE")
}
#compareAUCvPARE(rep(0, 100))
compareAUCvPARE(rnorm(5), iterations=5)
## Iteration 1/5
## Iteration 2/5
## Iteration 3/5
## Iteration 4/5
## Iteration 5/5
## Saving 7 x 5 in image
#compareAUCvPARE(rnorm(100), iterations=100)
generateNumericScores = function(means, randomSeed=999, n=numRandomValues)
{
if (length(means) < 1)
stop("Must provide at least one mean.")
set.seed(randomSeed)
scores = NULL
for (i in 1:length(means))
scores = c(scores, rnorm(n / length(means), mean=means[i]))
return(standardize(scores))
}
randomScores = sort(generateNumericScores(0))
hist(randomScores)
signalScores = sort(generateNumericScores(c(0, 10)))
hist(signalScores)
calibratedOutput = isoreg(signalScores)
plot(calibratedOutput)
plot(calibratedOutput$x, calibratedOutput$yf, ylim=c(0,1))
plot(calibratedOutput$x, calibratedOutput$yc[-1])
***************************************************** ** Stop here *****************************************************
library(mlr)
data(iris)
task = makeClassifTask(id = “tutorial”, data = iris, target = “Species”)
n = getTaskSize(task) train.set = seq(1, n, by = 2) test.set = seq(2, n, by = 2)
task = makeClassifTask(id = “tutorial”, data = iris, target = “Species”) lrn = makeLearner(“classif.lda”, predict.type = “prob”) mod = train(lrn, task, subset = train.set) pred = predict(mod, task = task, subset = test.set)
performance(pred, measures = list(multiclass.auc)) #```
generateNumericScoresMultipleClasses = function(means, randomSeed=0, n=numRandomValues, defaultMean=0) { if (length(means) < 1) stop(“Must provide at least one mean.”)
set.seed(randomSeed)
scores = NULL for (i in 1:length(means)) { numValues = n / length(means)
scoresThisClass = NULL
for (j in 1:length(means))
{
if (i == j) {
scoresThisClass = c(scoresThisClass, rnorm(numValues, mean=means[i]))
} else {
scoresThisClass = c(scoresThisClass, rnorm(numValues, mean=defaultMean))
}
}
scores = cbind(scores, scoresThisClass)
}
scores = t(apply(scores, 1, function(x) { x / sum(x) }))
colnames(scores) = 0:(length(means) - 1)
return(scores) } #```
library(pROC)
plotMultiRoc = function(actual, scores, main) { rocResult = multiclass.roc(actual, scores) print(rocResult) # aucValue = auc(rocResult)
}
plotMultiRoc(actual4Classes, randomScores4Classes, main=“Random Scores - 4 Classes”) #```
actual4Classes = factor(c(rep(0, numRandomValues / 4), rep(1, numRandomValues / 4), rep(2, numRandomValues / 4), rep(3, numRandomValues / 4))) #actual4ClassesImbalanced = factor(c(rep(0, 10), rep(1, 10), rep(2, 490), rep(3, 490)))
randomScores4Classes = generateNumericScoresMultipleClasses(c(10, 10, 10, 10), n=numRandomValues, defaultMean=10) mediumSignalScores4Classes = generateNumericScoresMultipleClasses(c(11, 11, 11, 11), n=numRandomValues, defaultMean=10) signalScores4Classes = generateNumericScoresMultipleClasses(c(20, 20, 20, 20), n=numRandomValues, defaultMean=10) oppositeScores4Classes = generateNumericScoresMultipleClasses(c(5, 5, 5, 5), n=numRandomValues, defaultMean=10) signalVariesScores4Classes = generateNumericScoresMultipleClasses(c(10, 20, 10, 10), n=numRandomValues, defaultMean=10) #```
{r} pare(actual4Classes, randomScores4Classes, main="Random Scores - 4 Classes") pare(actual4Classes, signalScores4Classes, main="Signal Scores - 4 Classes") pare(actual4Classes, mediumSignalScores4Classes, main="Medium Signal Scores - 4 Classes") pare(actual4Classes, oppositeScores4Classes, main="Opposite Scores - 4 Classes") pare(actual4Classes, signalVariesScores4Classes, main="Signal Varies Scores - 4 Classes") #library(mlr)
data = data.frame(a=rnorm(150), b=rnorm(150), Class=as.factor(c(rep(0, 50), rep(1, 50), rep(2, 50))))
task = makeClassifTask(id = “test”, data = data, target = “Class”) lrn = makeLearner(“classif.lda”, predict.type = “prob”) rdesc = makeResampleDesc(method = “CV”, stratify = TRUE) r = resample(learner = lrn, task = task, resampling = rdesc, show.info = FALSE) threshold = c(“0”=0.33, “1”=0.33, “2”=0.33) predictions = setThreshold(r\(pred, threshold=threshold) print(predictions\)data) print(predictions)
performance(r$pred, measures = list(acc, multiclass.auc)) #``` #########################################################################
https://twitter.com/anshul/status/761117748126638084
Make a different version of PARE that gives a score for each individual sample. Compared to random chance for all patients of the same class, how accurate were the predictions for each sample? This is a more intuitive way to interpret the “A Few Good Scores” results shown in the PARE paper. You could then take the average of these across all samples. If you don’t include it in the grant, then write a paper based on it anyway.
One A Few Good Scores, why do the PARE scores at higher thresholds stay high? Should drop off, no?
Compare this to Cohen’s Kappa (which seems to be interpreted the same way - but doesn’t work for probabilistic classifiers?).
Need some more scenarios where we can show the value of threshold selection.
How does it work when there are only 5 distinct prediction values that are within a small range? Could change it so the thresholds are actually the predictors rather than thresholds between them.
It doesn’t work right when using “truly ideal” and there is a class imbalance. Would be a nice selling point if you can tweak it to work. Study this: http://web.cs.iastate.edu/~cs573x/Notes/hand-article.pdf
Make it so it can find threshold that maximizes two metrics. [Probably not needed.] Maximize the minimum of the two metrics across all thresholds. Can apply it to more than two metrics?
Multiclass What kind of output do you get when mlr is applied to multiclass data in mlr? Seems like the only feasible way is to average across the classes. Use the pROC package to plot multiclass.roc and compare against it. Make sure the input class labels are zero/one.
Break AUC: The ROC assumes two normal distributions. See if you can break it with something different. Try a wide range of imbalance levels and see how PARE compares against AUC. Smooth the permuted accuracy values. 2. How much difference do you see in the scores for correctly identifying 1/1000 correctly vs. 1/1000000 correctly for an extremely rare disease? 3. “Why not use log loss? That’s the metric that most of the Kaggle competitions moved to. It tests how well calibrated the probability statements of a model are in a way that neither 0/1 loss, squared error, or ROC curve metrics like mean precision don’t…The use of log on the error provides extreme punishments for being both confident and wrong. In the worst possible case, a single prediction that something is definitely true (1) when it is actually false will add infinite to your error score and make every other entry pointless. In Kaggle competitions, predictions are bounded away from the extremes by a small value in order to prevent this.” (https://www.kaggle.com/wiki/LogarithmicLoss) Report optimal cut point as max difference from permuted baseline? Handle special case where you never see greater than 0.01 above permuted baseline. Add Sensitivity and Specificity lines (grey, dotted) to the plot.
Multiclass Read these three papers in Evernote: Receiver Operating Characteristic Analysis:A Primer A Simple Generalisation of the Area Under the ROCCurve for Multiple Class Classification Problems Statistics review 13: Receiver operating characteristic curves Add option to change the “ideal” result so that it actually is the ideal (0 or 1 probabilities). * In theory, this is what you do with accuracy. * Makes it so you can compare models more fairly. http://andrewgelman.com/2016/01/30/evaluating-models-with-predictive-accuracy/ Provide a parameter that enables the user to limit the PARE calculation to a specific range of thresholds and demonstrate what this gives you. Use bootstrap subsampling to calculate confidence intervals. Should see wider intervals for smaller data sets. Merge the two plots I currently have and use ggplot2 to plot the combined plot. Use plotThreshVsPerf in mlr to create plots.
Add more error checking of input parameters on the pare function and related functions Scale input predictor values between 0 and 1 if not already in that range? Let them specify the number of thresholds and/or threshold step size as a parameter.
“A systematic analysis of performance measures for classification tasks” in Evernote. * AUC “can also be viewed as a linear transformation of Youden Index” * What is the Youden index and how is it calculated? * Invariance property (switching around the confusion matrix). Can we say that PARE meets this better than accuracy?
Try different sample sizes: 100000, 1000, 100, 10 Read this and make notes within this notebook: http://web.cs.iastate.edu/~cs573x/Notes/hand-article.pdf Can you deal with the the problem of lines crossing. http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1620718 Address the question that people ask, “At what level of accuracy do you get excited?” Emphasize how you can use this method to quantify whether performance is better than clinical/baseline predictors. Can you apply it to multilabel classification problems? That’s a new angle. Can you apply it to regression problems?
A key contribution would be that it generalizes beyond two classes.
Its roots in signal processing make it more difficult for people to understand.
Here’s a list of classification measures: https://mlr-org.github.io/mlr-tutorial/tutorial/release/html/measures/index.html
One non-intuitive thing about ROC plots is that it is varying the threshold along an axis, but it is does not show that threshold explicitly on the plot. It’s also not intuitive that an AUC value of 0.5 is what you expect by chance. Also, it’s difficult to calculate the AUC. And it doesn’t work for multi-class problems. Also sensitivity and specificity are difficult for many people to understand.
What is positive about this metric is that it is robust to class imbalance and is used widely and relatively easy to interpret because it can be related loosely back to accuracy. And that it doesn’t force you to choose one specific threshold.
What this could also show you is which is the optimal threshold to use in clinical settings.
To get this into a top journal (Nature Methods), you could write this as a tutorial/review article and add your own metric at the end.
These curves seem to have two main purposes: 1) assessing the balance between sensitivity and specificity and 2) comparing classifiers. Need to address both of these needs?
Develop a tool in Python (http://scikit-learn.org/stable/modules/generated/sklearn.metrics.auc.html) and/or in R (http://stackoverflow.com/questions/4954507/calculate-the-area-under-a-curve-in-r).
See info here: http://en.wikipedia.org/wiki/Receiver_operating_characteristic.
Look at paper entitled, “Union–intersection permutation solution for two-sample equivalence testing.”
Summary of performance measures in machine learning: http://www.cs.cornell.edu/courses/cs578/2003fa/performance_measures.pdf
Nicolas Robine @notSoJunkDNA
KP: Side point: Harmonic mean of precision and recall (Fmax statistics) is better than AUC in many genomics application… #5points
https://twitter.com/notSoJunkDNA/status/580732327149596672
Harmonic mean of precision/recall is described in http://www.cs.cornell.edu/courses/cs578/2003fa/performance_measures.pdf
https://rocr.bioinf.mpi-sb.mpg.de/
Has an implementation of accuracy threshold plot. Many other metrics, too.
The precision-recall curve is an alternative. But it’s not really better. This presentation provides some theoretical background: http://www.ke.tu-darmstadt.de/lehre/archiv/ws0708/ml-sem/Folien/Wen_Zhang.pdf
Here’s another alternative along with some more theory: http://link.springer.com/article/10.1007%2Fs10994-009-5119-5
R package described here: http://www.hmeasure.net
Claims that AUC is equivalent to the “Gini coefficient”.
“For example, if ROC curves cross then it is possible that one curve has a larger AUC (and so is apparently better) even though the alternative may show superior performance over almost the entire range of values of the classification threshold (defined below) for which the curve will be used.”
“a much more fundamental weakness of the AUC which appears not to have been previously recognised. This is that, as we show below, the AUC is equivalent to measuring the performance of classification rules using metrics which depend on the rules being measured. In particular, instead of regarding the relative severities of different kinds of misclassifications (i.e., misclassifying a class 0 object as class 1, and a class 1 as class 0) as the same for different classifiers, the AUC takes these relative severities themselves to depend on the classifier being measured. This is, of course, nonsensical, since the relative severities, even if one does not know them precisely, are independent of the classifiers themselves and must be determined by factors describing the problem external to the classifiers. It is as if one chose to compare the heights of two people using rulers in which the basic units of measurement themselves depended on the heights.”
Here’s another one: http://users.dsic.upv.es/~flip/ROCAI2004/papers/03-vfinal-Drummond_and_Holte-A4.pdf They emphasize confidence intervals, which is important and should be included in our tool (bootstrap approach?).
Another: http://www.nssl.noaa.gov/users/brooks/public_html/feda/papers/Drummond%20and%20Holte.pdf
http://www.igi-global.com/article/alternative-approach-evaluating-binormal-roc/80236
http://www.jstor.org/discover/10.2307/3702911?sid=21105221364251&uid=2&uid=4&uid=3739256&uid=3739808
Use the comparison methodology described here?: http://webdocs.cs.ualberta.ca/~ozlem/papers/TR2009.pdf
Another possibility is to come up with an alternative to survival analysis: http://errorstatistics.com/2013/04/19/stephen-senn-when-relevance-is-irrelevant/